library(pacman)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
library(gmodels)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
property_dataset <- read.csv("fy2023-property-assessment-data.csv")
property_subset <- subset(property_dataset,select = c(CITY,ZIP_CODE,LU,LAND_SF,LAND_VALUE,TOTAL_VALUE,YR_BUILT,YR_REMODEL,ROOF_STRUCTURE,BED_RMS,FULL_BTH,KITCHENS,TT_RMS,HEAT_TYPE,AC_TYPE,PROP_VIEW))
head(property_subset,10)
## CITY ZIP_CODE LU LAND_SF LAND_VALUE TOTAL_VALUE YR_BUILT YR_REMODEL
## 1 EAST BOSTON 2128 R3 1150 195700 784200 1900 NA
## 2 EAST BOSTON 2128 R3 1150 196500 811800 1920 2000
## 3 EAST BOSTON 2128 R3 1150 197100 796300 1905 1985
## 4 EAST BOSTON 2128 R3 1150 197700 727900 1900 1991
## 5 EAST BOSTON 2128 R2 2010 225700 794800 1900 1978
## 6 EAST BOSTON 2128 R3 2500 261200 1288300 1900 2018
## 7 EAST BOSTON 2128 R3 2500 262000 1255200 1900 2009
## 8 EAST BOSTON 2128 R3 2500 262700 1139200 1900 NA
## 9 EAST BOSTON 2128 R3 2500 263200 876300 1900 1998
## 10 EAST BOSTON 2128 R3 2500 224400 1027200 1900 2020
## ROOF_STRUCTURE BED_RMS FULL_BTH KITCHENS TT_RMS HEAT_TYPE
## 1 F - Flat 6 3 3 12 W - Ht Water/Steam
## 2 F - Flat 3 3 3 9 F - Forced Hot Air
## 3 F - Flat 5 3 3 13 S - Space Heat
## 4 M - Mansard 5 3 3 11 W - Ht Water/Steam
## 5 M - Mansard 6 3 2 13 W - Ht Water/Steam
## 6 F - Flat 13 6 3 20 E - Electric
## 7 F - Flat 14 5 3 20 W - Ht Water/Steam
## 8 F - Flat 11 3 3 16 W - Ht Water/Steam
## 9 F - Flat 5 3 3 14 W - Ht Water/Steam
## 10 F - Flat 6 3 3 14 W - Ht Water/Steam
## AC_TYPE PROP_VIEW
## 1 N - None A - Average
## 2 C - Central AC A - Average
## 3 N - None A - Average
## 4 N - None A - Average
## 5 N - None A - Average
## 6 N - None A - Average
## 7 N - None A - Average
## 8 N - None A - Average
## 9 N - None A - Average
## 10 C - Central AC A - Average
summary(property_subset)
## CITY ZIP_CODE LU LAND_SF
## Length:180627 Min. :2026 Length:180627 Min. : 100
## Class :character 1st Qu.:2119 Class :character 1st Qu.: 1000
## Mode :character Median :2127 Mode :character Median : 2014
## Mean :2130 Mean : 7816
## 3rd Qu.:2131 3rd Qu.: 4770
## Max. :2467 Max. :101513565
## NA's :3 NA's :7545
## LAND_VALUE TOTAL_VALUE YR_BUILT YR_REMODEL
## Min. : 0 Min. :0.000e+00 Min. : 1700 Min. : 201
## 1st Qu.: 0 1st Qu.:3.796e+05 1st Qu.: 1900 1st Qu.: 1987
## Median : 0 Median :6.096e+05 Median : 1920 Median : 2004
## Mean : 376579 Mean :1.500e+06 Mean : 1932 Mean : 2001
## 3rd Qu.: 225000 3rd Qu.:9.141e+05 3rd Qu.: 1964 3rd Qu.: 2015
## Max. :486046900 Max. :2.143e+09 Max. :20198 Max. :20220
## NA's :22930 NA's :95227
## ROOF_STRUCTURE BED_RMS FULL_BTH KITCHENS
## Length:180627 Min. : 0.00 Min. : 0.000 Min. :0.000
## Class :character 1st Qu.: 2.00 1st Qu.: 1.000 1st Qu.:1.000
## Mode :character Median : 3.00 Median : 1.000 Median :1.000
## Mean : 3.16 Mean : 1.351 Mean :1.053
## 3rd Qu.: 4.00 3rd Qu.: 2.000 3rd Qu.:1.000
## Max. :17.00 Max. :17.000 Max. :5.000
## NA's :48287 NA's :11116 NA's :11114
## TT_RMS HEAT_TYPE AC_TYPE PROP_VIEW
## Min. : 1.00 Length:180627 Length:180627 Length:180627
## 1st Qu.: 4.00 Class :character Class :character Class :character
## Median : 6.00 Mode :character Mode :character Mode :character
## Mean : 6.97
## 3rd Qu.: 9.00
## Max. :20.00
## NA's :48354
IQR() and also removed
values == 0iqr_land_value <- IQR(property_subset$LAND_VALUE)
# Define lower and upper bounds for outliers
lower_bound <- quantile(property_subset$LAND_VALUE)[2] - 1.5 * iqr_land_value
upper_bound <- quantile(property_subset$LAND_VALUE)[4] + 1.5 * iqr_land_value
# Remove outliers from LAND_VALUE
data_cleaned <- property_subset %>%
filter(LAND_VALUE >= lower_bound, LAND_VALUE <= upper_bound)
data_cleaned <- data_cleaned %>%
filter(LAND_VALUE != 0)
land_value <- subset(data_cleaned, select = LAND_VALUE)
gg_land_value <- ggplot(land_value, aes(x = LAND_VALUE)) +
geom_histogram(fill = "blue", color = "black", binwidth=100000) +
labs(
title = "Histogram of Land Values",
x = "Land Values in Dollars",
y = "Frequency"
) +
scale_x_continuous(labels = scales::comma) + # Format x-axis labels with commas
scale_y_continuous(labels = scales::comma) # Format y-axis labels with commas
plotly_hist <- ggplotly(gg_land_value)
plotly_hist
ggplot() and
plotly() and found that maximum Land Value
is in the range# Creating a histogram for number of houses per city using ggplot
gg_hist <-ggplot(property_subset, aes(x = reorder(CITY, -table(CITY)[CITY]))) +
geom_bar(fill = "blue", color = "black") +
labs(
title = "Number of houses per city ",
x = "City",
y = "Frequency of houses"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Using plotly for creating an interactive graph
plotly_hist <- ggplotly(gg_hist)
plotly_hist
ggplot() and
plotly().average_land_value_per_sqft <- property_subset %>%
group_by(CITY) %>%
summarize(Avg_Land_Value_per_Sqft = sum(LAND_VALUE) / sum(LAND_SF, na.rm = TRUE))
#Using table() to check for blank values
table(property_subset$CITY)
##
## ALLSTON BOSTON BRIGHTON
## 5 4423 47104 11870
## BROOKLINE CHARLESTOWN CHESTNUT HILL DEDHAM
## 24 7230 1017 6
## DORCHESTER EAST BOSTON HYDE PARK JAMAICA PLAIN
## 29212 9951 9207 12104
## MATTAPAN NEWTON READVILLE ROSLINDALE
## 4847 1 2 9210
## ROXBURY ROXBURY CROSSING SOUTH BOSTON WEST ROXBURY
## 6168 1832 15439 10975
subset().average_land_value_per_sqft <- filter(average_land_value_per_sqft, CITY!="")
#Using ggplot to plot bar graph for Average land value per sq ft.
ggplot(average_land_value_per_sqft, aes(x = reorder(CITY,-Avg_Land_Value_per_Sqft), y = Avg_Land_Value_per_Sqft)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Average Land Value Per Sqft by City", x = "City", y = "Average Land Value Per Sqft")+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
scale_y_continuous(labels = scales::dollar_format(prefix = "$"),limits = c(0,200))+
geom_text(aes(label = round(Avg_Land_Value_per_Sqft, 2), vjust = -0.5), size = 3)
ggplot() and found out that Boston has
highest value at $169.56 per sqfttable(property_subset$ROOF_STRUCTURE)
##
## F - Flat G - Gable H - Hip L - Gambrel M - Mansard
## 36420 67348 46495 13996 2154 13523
## O - Other S - Shed
## 343 348
# Removing blank values
roof_structure_cleaned <- property_subset%>%filter(ROOF_STRUCTURE != "")
# Plotting Bar chart using ggplot
ggplot(roof_structure_cleaned, aes(x = ROOF_STRUCTURE)) +
geom_bar(fill = "lightblue") +
labs(title = "Distribution of Roof Structure", x = "Roof Structure", y = "Count") + geom_text(
aes(label = after_stat(count)),
stat = "count",
vjust = -0.5
)+ theme(axis.text.x = element_text(angle = 45, hjust = 1))+
scale_y_continuous(limits = c(0,80000))
* We’ve plotted a bar chart using
ggplot() and observed
that most used roof structure is F-Flat Roof structure
followed by G- Gable Roof structure .
year_built <- subset(property_subset, select =YR_BUILT)
# Omitting NA values
year_built <- na.omit(year_built)
# Few values in this YR_BUILT column are 5 digit, so we filter them out
year_built <- filter(year_built, YR_BUILT <3000)
gg_yr_built <- ggplot(data = year_built, aes(x = YR_BUILT)) +
geom_histogram(binwidth = 5
, fill = "lightblue", color = "black") +
labs(title = "Distribution of number of house built by year", x = "Year Built", y = "Number of houses")+
scale_x_continuous(limits = c(1600, 2050
), breaks = seq(1600,2050, by=50) )+
scale_y_continuous(limits = c(0,30000))
plotly_hist <- ggplotly(gg_yr_built)
plotly_hist
plotly() to visualise
number of houses built in different years. We observe that maximum
houses were built in the year 1990 with a count of
28,253.year_remodel <- subset(property_subset, select =YR_REMODEL)
# Omitting NA values
year_remodel <- na.omit(year_remodel)
# Few values in this YR_BUILT column are 5 digit, so we filter them out
year_remodel <- filter(year_remodel, YR_REMODEL <2030 & YR_REMODEL >1500)
gg_yr_remodel <- ggplot(year_remodel, aes(x = YR_REMODEL)) +
geom_histogram(binwidth = 5
, fill = "lightblue", color = "black") +
labs(title = "Distribution of Year Remodeled", x = "Year Remodeled", y = "Frequency")+
scale_x_continuous(limits = c(1900, 2050
), breaks = seq(1900,2050, by=10) )+
scale_y_continuous(limits = c(0,30000))
plotly_hist <- ggplotly(gg_yr_remodel)
plotly_hist
ggplot() and
plotly() and observed that maximum number of houses were
remodeled in the year 2015 with a count of
13556.subset() since it may affect our visualisation.#Removing unusually high values and zero values from the LAND_SF and LAND_VALUE
land_area <- subset(property_dataset, LAND_SF !=0 & LAND_VALUE != 0 & LAND_SF <=95000000 & LAND_VALUE <= 400000000)
#Calculating correlation coefficient to understand the strength of relationship between x axis and y axis
correlation_coefficient <- cor(land_area$LAND_SF, land_area$LAND_VALUE)
# Plotting Scatter plot using ggplot
ggplot(land_area, aes(x = LAND_SF, y = LAND_VALUE)) +
geom_point(size = 1, color = "blue") +
labs(
title = "Scatter Plot of Land Area vs Land Value",
x = "Land Area (Sq. Feet)",
y = "Land Value"
) +
theme_minimal()+ scale_x_continuous(labels = scales::comma) +
scale_y_continuous(labels = scales::comma)+geom_text(aes(x = 1000000, y = 350000000, label = paste("Correlation =", round(correlation_coefficient, 2))))
ggplot() and have obtained a
correlation coefficient = 0.44 which indicates moderate
positive linear relationship between the two variables. This also
indicates that price of land may depend on other variables apart from
land area.CrossTable() to visualise relation between Cities
and AC typesac_type <- gmodels::CrossTable(property_subset$CITY, property_subset$AC_TYPE,prop.t = FALSE, prop.c = FALSE, prop.chisq = FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 180627
##
##
## | property_subset$AC_TYPE
## property_subset$CITY | | C - Central AC | D - Ductless AC | N - None | Row Total |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## | 3 | 2 | 0 | 0 | 5 |
## | 0.600 | 0.400 | 0.000 | 0.000 | 0.000 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## ALLSTON | 1031 | 834 | 21 | 2537 | 4423 |
## | 0.233 | 0.189 | 0.005 | 0.574 | 0.024 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## BOSTON | 15633 | 19368 | 216 | 11887 | 47104 |
## | 0.332 | 0.411 | 0.005 | 0.252 | 0.261 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## BRIGHTON | 1936 | 2565 | 62 | 7307 | 11870 |
## | 0.163 | 0.216 | 0.005 | 0.616 | 0.066 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## BROOKLINE | 8 | 2 | 0 | 14 | 24 |
## | 0.333 | 0.083 | 0.000 | 0.583 | 0.000 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## CHARLESTOWN | 1984 | 3244 | 69 | 1933 | 7230 |
## | 0.274 | 0.449 | 0.010 | 0.267 | 0.040 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## CHESTNUT HILL | 50 | 538 | 3 | 426 | 1017 |
## | 0.049 | 0.529 | 0.003 | 0.419 | 0.006 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## DEDHAM | 2 | 1 | 0 | 3 | 6 |
## | 0.333 | 0.167 | 0.000 | 0.500 | 0.000 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## DORCHESTER | 7276 | 4926 | 150 | 16860 | 29212 |
## | 0.249 | 0.169 | 0.005 | 0.577 | 0.162 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## EAST BOSTON | 2639 | 2929 | 108 | 4275 | 9951 |
## | 0.265 | 0.294 | 0.011 | 0.430 | 0.055 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## HYDE PARK | 1721 | 1488 | 32 | 5966 | 9207 |
## | 0.187 | 0.162 | 0.003 | 0.648 | 0.051 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## JAMAICA PLAIN | 2955 | 3282 | 91 | 5776 | 12104 |
## | 0.244 | 0.271 | 0.008 | 0.477 | 0.067 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## MATTAPAN | 1080 | 521 | 17 | 3229 | 4847 |
## | 0.223 | 0.107 | 0.004 | 0.666 | 0.027 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## NEWTON | 0 | 0 | 0 | 1 | 1 |
## | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## READVILLE | 2 | 0 | 0 | 0 | 2 |
## | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## ROSLINDALE | 1665 | 1791 | 79 | 5675 | 9210 |
## | 0.181 | 0.194 | 0.009 | 0.616 | 0.051 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## ROXBURY | 2560 | 1021 | 35 | 2552 | 6168 |
## | 0.415 | 0.166 | 0.006 | 0.414 | 0.034 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## ROXBURY CROSSING | 697 | 445 | 8 | 682 | 1832 |
## | 0.380 | 0.243 | 0.004 | 0.372 | 0.010 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## SOUTH BOSTON | 4493 | 6698 | 92 | 4156 | 15439 |
## | 0.291 | 0.434 | 0.006 | 0.269 | 0.085 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## WEST ROXBURY | 2124 | 2447 | 151 | 6253 | 10975 |
## | 0.194 | 0.223 | 0.014 | 0.570 | 0.061 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
## Column Total | 47859 | 52102 | 1134 | 79532 | 180627 |
## ---------------------|-----------------|-----------------|-----------------|-----------------|-----------------|
##
##
# We have reshaped the data into a long format using `melt()` to plot stacked bar graph.
ac_type_long <- melt(ac_type, id.vars ="AC_TYPE", value.name = "Count")
# Converting Count column to integer for better readability of the stacked bar chart
ac_type_long$Count <- as.integer(ac_type_long$Count)
# Filtering out blank values
ac_type_long <- filter(ac_type_long, y !="" & x !="")
#Using ggplot to plot stacked bar graph
gg_ac_type <- ggplot(ac_type_long, aes(x = x, y = Count, fill = y)) +
geom_bar(stat = "identity") +
labs(title = "Ac type counts by City", x = "City", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
scale_y_continuous(
breaks = seq(0,30000, by=4000),
limits = c(0,45000))
# Converting ggplot to plotly
plotly_plot <- ggplotly(gg_ac_type)
# Show the interactive plotly plot
plotly_plot
ggplot() and
plotyly() and observed that maximum houses have C -
Central AC and Boston has highest count of
C - Central AC with a figure of
19,368. It can also be observed that D-
Ductless AC is not very common among the cities in our
dataset.CrossTable() to visualise relation between Cities
and Heat typesheat_type <- gmodels::CrossTable(property_subset$CITY, property_subset$HEAT_TYPE,prop.t = FALSE, prop.c = FALSE, prop.chisq = FALSE)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 180627
##
##
## | property_subset$HEAT_TYPE
## property_subset$CITY | | E - Electric | F - Forced Hot Air | N - None | O - Other | P - Heat Pump | S - Space Heat | W - Ht Water/Steam | Row Total |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## | 3 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | 5 |
## | 0.600 | 0.000 | 0.400 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## ALLSTON | 1031 | 68 | 973 | 1 | 1 | 39 | 16 | 2294 | 4423 |
## | 0.233 | 0.015 | 0.220 | 0.000 | 0.000 | 0.009 | 0.004 | 0.519 | 0.024 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## BOSTON | 15628 | 2562 | 12040 | 4 | 5 | 2960 | 71 | 13834 | 47104 |
## | 0.332 | 0.054 | 0.256 | 0.000 | 0.000 | 0.063 | 0.002 | 0.294 | 0.261 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## BRIGHTON | 1937 | 281 | 2512 | 2 | 1 | 184 | 6 | 6947 | 11870 |
## | 0.163 | 0.024 | 0.212 | 0.000 | 0.000 | 0.016 | 0.001 | 0.585 | 0.066 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## BROOKLINE | 8 | 0 | 2 | 0 | 0 | 0 | 0 | 14 | 24 |
## | 0.333 | 0.000 | 0.083 | 0.000 | 0.000 | 0.000 | 0.000 | 0.583 | 0.000 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## CHARLESTOWN | 1983 | 259 | 2518 | 0 | 1 | 485 | 23 | 1961 | 7230 |
## | 0.274 | 0.036 | 0.348 | 0.000 | 0.000 | 0.067 | 0.003 | 0.271 | 0.040 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## CHESTNUT HILL | 50 | 1 | 415 | 0 | 0 | 1 | 0 | 550 | 1017 |
## | 0.049 | 0.001 | 0.408 | 0.000 | 0.000 | 0.001 | 0.000 | 0.541 | 0.006 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## DEDHAM | 2 | 0 | 1 | 0 | 0 | 0 | 0 | 3 | 6 |
## | 0.333 | 0.000 | 0.167 | 0.000 | 0.000 | 0.000 | 0.000 | 0.500 | 0.000 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## DORCHESTER | 7273 | 654 | 8271 | 27 | 3 | 115 | 55 | 12814 | 29212 |
## | 0.249 | 0.022 | 0.283 | 0.001 | 0.000 | 0.004 | 0.002 | 0.439 | 0.162 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## EAST BOSTON | 2639 | 455 | 3086 | 14 | 1 | 62 | 261 | 3433 | 9951 |
## | 0.265 | 0.046 | 0.310 | 0.001 | 0.000 | 0.006 | 0.026 | 0.345 | 0.055 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## HYDE PARK | 1721 | 259 | 2749 | 10 | 5 | 18 | 12 | 4433 | 9207 |
## | 0.187 | 0.028 | 0.299 | 0.001 | 0.001 | 0.002 | 0.001 | 0.481 | 0.051 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## JAMAICA PLAIN | 2954 | 256 | 3379 | 4 | 7 | 213 | 22 | 5269 | 12104 |
## | 0.244 | 0.021 | 0.279 | 0.000 | 0.001 | 0.018 | 0.002 | 0.435 | 0.067 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## MATTAPAN | 1080 | 93 | 1269 | 6 | 0 | 1 | 6 | 2392 | 4847 |
## | 0.223 | 0.019 | 0.262 | 0.001 | 0.000 | 0.000 | 0.001 | 0.494 | 0.027 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## NEWTON | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 |
## | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 1.000 | 0.000 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## READVILLE | 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 |
## | 1.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## ROSLINDALE | 1661 | 196 | 2322 | 4 | 2 | 148 | 1 | 4876 | 9210 |
## | 0.180 | 0.021 | 0.252 | 0.000 | 0.000 | 0.016 | 0.000 | 0.529 | 0.051 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## ROXBURY | 2560 | 181 | 1490 | 2 | 2 | 34 | 25 | 1874 | 6168 |
## | 0.415 | 0.029 | 0.242 | 0.000 | 0.000 | 0.006 | 0.004 | 0.304 | 0.034 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## ROXBURY CROSSING | 697 | 61 | 343 | 1 | 0 | 134 | 5 | 591 | 1832 |
## | 0.380 | 0.033 | 0.187 | 0.001 | 0.000 | 0.073 | 0.003 | 0.323 | 0.010 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## SOUTH BOSTON | 4492 | 465 | 6429 | 1 | 10 | 101 | 171 | 3770 | 15439 |
## | 0.291 | 0.030 | 0.416 | 0.000 | 0.001 | 0.007 | 0.011 | 0.244 | 0.085 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## WEST ROXBURY | 2124 | 112 | 2000 | 13 | 3 | 124 | 3 | 6596 | 10975 |
## | 0.194 | 0.010 | 0.182 | 0.001 | 0.000 | 0.011 | 0.000 | 0.601 | 0.061 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
## Column Total | 47845 | 5903 | 49801 | 89 | 41 | 4619 | 677 | 71652 | 180627 |
## ---------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|--------------------|
##
##
# We have reshaped the data into a long format using `melt()` to plot stacked bar graph.
heat_type_long <- melt(heat_type, id.vars ="HEAT_TYPE", value.name = "Count")
# Converting Count column to integer for better readability of the stacked bar chart
heat_type_long$Count <- as.integer(heat_type_long$Count)
# Filtering out blank values
heat_type_long <- filter(heat_type_long, y !="" & x !="")
#Using ggplot to plot stacked bar graph
gg_heat_type <- ggplot(heat_type_long, aes(x = x, y = Count, fill = y)) +
geom_bar(stat = "identity") +
labs(title = "Heat type counts by City", x = "City", y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
scale_y_continuous(
breaks = seq(0,40000, by=5000),
limits = c(0,45000))
# Converting ggplot to plotly
plotly_plot <- ggplotly(gg_heat_type)
# Show the interactive plotly plot
plotly_plot
ggplot() and
plotyly() and observed that most common heating types are
F - Forced Hot Air and W - Ht
Water/Steam.